# -*- coding: UTF-8 -*-
__author__ = 'Jinkey'


# =================================================================
# 文档下载
import nltk
nltk.download_gui()


# ======================================================文档导入
# from nltk import *
# from nltk.corpus import gutenberg
# from nltk.corpus import webtext  #网页文本语料库
# from nltk.corpus import nps_chat  #聊天语料库
# from nltk.corpus import brown  # 布朗语料库
# from nltk.corpus import reuters  # 路透社
# from nltk.corpus import inaugural  # 就职演说
#
#
# # ===============================================文档内容获取方式
# fileids = gutenberg.fileids()  # 获取文档集
# for fileid in fileids:
#     print gutenberg.raw()  # 原始语料
#     print gutenberg.words()  # 分词
#     print gutenberg.sents()  # 分句
# print brown.categories()  # 分类

